load("Normal_GLM.rdata")
source("Normal_GLM.r")
## [1] 15.59637
## [1] 10.63484
## [1] 6.27827
## [1] 2.279274
## [1] 0.3134914
## [1] 0.00670344
## [1] 3.489446e-06
## [1] 5.542233e-13
## [1] 3.552714e-14
## [1] 1.563194e-12
## [1] 0
## (Intercept) x x2
## 11.45196 43.89866 -42.74408
## (Intercept) x x2
## 0.68391 17.95136 -17.74880
## [1] 0
library(carData)
library(car)
# (a)
xsquared = x^2
fit = lm(y~x+xsquared)
summary(fit)
##
## Call:
## lm(formula = y ~ x + xsquared)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9940 -0.6093 0.0508 0.5487 4.1157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.4348 0.5395 19.34 < 2e-16 ***
## x -27.1207 2.2597 -12.00 6.46e-16 ***
## xsquared 27.5193 2.0916 13.16 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.08 on 47 degrees of freedom
## Multiple R-squared: 0.793, Adjusted R-squared: 0.7842
## F-statistic: 90.01 on 2 and 47 DF, p-value: < 2.2e-16
# plot(fit)
# Test whether the quadratic term is significant... p-value is in the printout though
linearHypothesis(fit, "xsquared = 0")
## Linear hypothesis test
##
## Hypothesis:
## xsquared = 0
##
## Model 1: restricted model
## Model 2: y ~ x + xsquared
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 48 256.521
## 2 47 54.776 1 201.75 173.11 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# fitH0 = lm(y~x)
# SSres = sum(fit$residuals^2)
# SSresH0 = sum(fitH0$residuals^2)
#
# F = ((SSresH0 - SSres) / (SSres)) * ((50-3)/(1))
# 1-pf(F, 1, 50-3)
# Plot the residuals and decide if the data is homoscedastic.... Possibly heteroscedastic...
plot(fit$residuals)
# (b) calculating the log-likelihood
beta1 = fit$coef / var(y) # eta_1 is mu_1 / sigma_i^2 and in the earlier model the sigma is constant (sample variance y)
beta2 = c(1 / var(y), 0, 0) # eta_2 is 1/sigma_i^2 and in the earlier model sigma is constant and doesn't depend on x at all
ones = rep(1, length(x))
X1 = cbind(ones, x, x^2)
X2 = cbind(ones, x, x^2)
lik(beta1, beta2, y, X1, X2)
## [1] -93.17752
# gradient
Dlik(beta1, beta2, y, X1, X2) # note the gradient for eta_1 is very small, for eta_2 the gradient is way off. The way to improve is to change the variance estimates to depend on x. We are currently at a maximum where the sigma doesn't depend on x
## (Intercept) x xsquared <NA> <NA>
## 2.273737e-13 1.421085e-13 5.684342e-14 1.076038e+02 5.057183e+01
## <NA>
## 2.345618e+01
# two explanations: The model in part (a) is a submodel where beta_2,1 = beta_2,2 = 0 and have the tranformations above for the betas.
# Gradient explanation: the gradient for eta_1 is very small, for eta_2 the gradient is way off. The way to improve is to change the variance estimates to depend on x. We are currently at a maximum where the sigma doesn't depend on x
# (c) Optimizing
# Optimizing with Newton-Raphson
NewtonRaphsonOptim = function(currentBeta) {
while (TRUE) {
l = lik(currentBeta[1:3], currentBeta[4:6], y, X1, X2)
Dl = Dlik(currentBeta[1:3], currentBeta[4:6], y, X1, X2)
D2l = D2lik(currentBeta[1:3], currentBeta[4:6], y, X1, X2)
if (sum(Dl^2) < 10^(-8)) {
return(currentBeta)
}
hm = -solve(D2l) %*% Dl
newBeta = currentBeta + hm
newl = lik(newBeta[1:3], newBeta[4:6], y, X1, X2)
while (is.nan(newl) || newl <= l) {
hm = hm / 2
newBeta = currentBeta + hm
newl = lik(newBeta[1:3], newBeta[4:6], y, X1, X2)
}
currentBeta = newBeta
}
}
betaMLE = NewtonRaphsonOptim(c(beta1, beta2))
betaMLE
## [,1]
## (Intercept) 11.45196
## x 43.89866
## xsquared -42.74408
## 0.68391
## 17.95136
## -17.74880
# Optimizing with optim()
optimized = optim(c(beta1, beta2),
function(betaC) {lik(betaC[1:3], betaC[4:6], y, X1, X2)},
gr=function(betaC) {Dlik(betaC[1:3], betaC[4:6], y, X1, X2)},
method="BFGS", # This is a "quasi-Newton method (variable metric algorithm)" which takes advantage of the gradient
control=list(fnscale=-1)) # Maximize rather than minimize
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
# NOTE: I think R essentially filtered out the inappropriate beta2 values (due to NaN from the log(..)), which is why it throws these warnings
# optimized$par[4] + x*optimized$par[5] + x^2*optimized$par[6] # The estimated beta2 makes sense though
optimized
## $par
## (Intercept) x xsquared
## 11.452782 43.152789 -42.015401 0.686633 17.779954 -17.579979
##
## $value
## [1] -38.14069
##
## $counts
## function gradient
## 175 100
##
## $convergence
## [1] 1
##
## $message
## NULL
# Plot of data vs predictions from standard linear model
plot(x, y, col= "black", ylim=c(0,16))
predictedFromSLM = fit$coef[1] + fit$coef[2]*x + fit$coef[3]*(x^2)
points(x, predictedFromSLM, col="#CC79A7") # pink
estimatedSigmaSq = 1/(betaMLE[4] + betaMLE[5]*x + betaMLE[6]*(x^2))
predictedFromGLM = (betaMLE[1] + betaMLE[2]*x + betaMLE[3]*(x^2)) * estimatedSigmaSq
points(x, predictedFromGLM, col="#0072B2") # blue
residualsGLM = predictedFromGLM - y
plot(x, residualsGLM, ylim=c(-1.5,1.5))
#lines(x, residualsGLM+sd(residualsGLM))
#lines(x, residualsGLM-sd(residualsGLM))
#lines(x, residualsGLM+sqrt(estimatedSigmaSq), col="#0072B2")
#lines(x, residualsGLM-sqrt(estimatedSigmaSq), col="#0072B2")
lines(x, sqrt(estimatedSigmaSq), col="#0072B2")
lines(x, -sqrt(estimatedSigmaSq), col="#0072B2")
sqrt(estimatedSigmaSq)+residualsGLM
## [1] 0.549529631 1.174919419 1.384026344 0.267883203 0.279101027
## [6] 1.024177882 0.696313827 0.003416554 0.057145592 0.500768728
## [11] -0.194700852 0.538667581 0.399879820 0.892568680 0.245980870
## [16] 1.267327223 1.055431430 0.463237299 0.574406300 -0.131822446
## [21] 0.153859895 0.830536622 0.339394687 1.161379297 0.304933259
## [26] -0.094316573 0.601009163 0.718680660 0.572312482 1.545794462
## [31] 0.615642275 0.508539597 -0.124906773 -0.076105326 -0.039963368
## [36] 0.816250108 -0.257427489 -0.046640508 1.438289093 0.741633714
## [41] -0.310608441 0.113386481 0.297734621 1.115963483 -0.074063299
## [46] 0.142160212 1.550847205 1.566033415 1.503599687 0.127938509
residualsSLM = predictedFromSLM - y
plot(x, residualsSLM, ylim=c(-5,4))
#lines(x, residualsSLM+1.08)
#lines(x, residualsSLM-1.08)
lines(x, rep(1.08, length(x)), col="#0072B2")
lines(x, rep(-1.08, length(x)), col="#0072B2")
# (d)
# test for quadratic term in eta_1
NROptimNoQuadEta1 = function(currentBeta) {
finished = FALSE
while (!finished) {
l = lik(currentBeta[1:2], currentBeta[3:5], y, X1[,1:2], X2)
Dl = Dlik(currentBeta[1:2], currentBeta[3:5], y, X1[,1:2], X2)
D2l = D2lik(currentBeta[1:2], currentBeta[3:5], y, X1[,1:2], X2)
if (sum(Dl^2) < 10^(-8)) {
return(currentBeta)
}
hm = -solve(D2l) %*% Dl
newBeta = currentBeta + hm
newl = lik(newBeta[1:2], newBeta[3:5], y, X1[,1:2], X2)
while (is.nan(newl) || newl <= l) {
hm = hm / 2
newBeta = currentBeta + hm
newl = lik(newBeta[1:2], newBeta[3:5], y, X1[,1:2], X2)
}
currentBeta = newBeta
}
return(currentBeta)
}
H0betaMLE = NROptimNoQuadEta1(c(betaMLE[1:2], betaMLE[4:6]))
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
H0betaMLE
## [,1]
## [1,] 14.912839174
## [2,] 0.002674225
## [3,] 1.118181109
## [4,] 9.949501118
## [5,] -9.914566856
dev1 = -2*lik(betaMLE[1:3], betaMLE[4:6], y, X1, X2)
dev0 = -2*lik(H0betaMLE[1:2], H0betaMLE[3:5], y, X1[,1:2], X2)
1-pchisq(dev0 - dev1, df=1)
## [1] 0.0002425917
dev0-dev1
## [1] 13.46858
# test for quadratic term in eta_2
NROptimNoQuadEta2 = function(currentBeta) {
finished = FALSE
while (!finished) {
l = lik(currentBeta[1:3], currentBeta[4:5], y, X1, X2[,1:2])
Dl = Dlik(currentBeta[1:3], currentBeta[4:5], y, X1, X2[,1:2])
D2l = D2lik(currentBeta[1:3], currentBeta[4:5], y, X1, X2[,1:2])
if (sum(Dl^2) < 10^(-8)) {
return(currentBeta)
}
hm = -solve(D2l) %*% Dl
newBeta = currentBeta + hm
newl = lik(newBeta[1:3], newBeta[4:5], y, X1, X2[,1:2])
while (is.nan(newl) || newl <= l) {
hm = hm / 2
newBeta = currentBeta + hm
newl = lik(newBeta[1:3], newBeta[4:5], y, X1, X2[,1:2])
}
currentBeta = newBeta
}
return(currentBeta)
}
H0betaMLE = NROptimNoQuadEta2(c(betaMLE[1:3], betaMLE[4:5]))
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
## Warning in log(X2 %*% beta2): NaNs produced
H0betaMLE
## [,1]
## [1,] 9.9645446
## [2,] -25.5552970
## [3,] 25.0106087
## [4,] 0.9747917
## [5,] -0.1166871
dev1 = -2*lik(betaMLE[1:3], betaMLE[4:6], y, X1, X2)
dev0 = -2*lik(H0betaMLE[1:3], H0betaMLE[4:5], y, X1, X2[,1:2])
1-pchisq(dev0 - dev1, df=1)
## [1] 1.110223e-16
# Test for GLM model vs SLM
dev1 = -2*lik(betaMLE[1:3], betaMLE[4:6], y, X1, X2)
dev0 = -2*lik(beta1, beta2, y, X1, X2)
1-pchisq(dev0 - dev1, df=2)
## [1] 0
# (e)
predictionSimulation = function(xVal, numTrials) {
mseSLMmean = 0
mseGLMmean = 0
mseSLMsd = 0
mseGLMsd = 0
# The true values of the mean and variance from the model in (c)
trueVariances = estimatedSigmaSq
trueMeans = predictedFromGLM
# The true values for the xValue from the model in (c)
modelVar = 1/(betaMLE[4] + betaMLE[5]*xVal + betaMLE[6]*(xVal^2))
modelSd = sqrt(modelVar)
modelMean = (betaMLE[1] + betaMLE[2]*xVal + betaMLE[3]*(xVal^2)) * modelVar
for (trial in 1:numTrials) {
simulatedY = rnorm(50, mean=trueMeans, sd=sqrt(trueVariances))
newSLMfit = lm(simulatedY~x+xsquared)
newSLMPrediction = newSLMfit$coef[[1]] + newSLMfit$coef[[2]]*xVal + newSLMfit$coef[[3]]*xVal^2
newSLMsd = sd(newSLMfit$residuals)
newBeta1 = newSLMfit$coef / var(simulatedY)
newBeta2 = c(1 / var(simulatedY), 0, 0)
newBetaMLE = NewtonRaphsonOptim(c(newBeta1, newBeta2))
newGLMEstVariance = 1/(newBetaMLE[4] + newBetaMLE[5]*xVal + newBetaMLE[6]*(xVal^2))
newPredictedY = (newBetaMLE[1] + newBetaMLE[2]*xVal
+ newBetaMLE[3]*(xVal^2)) * newGLMEstVariance
newGLMsd = sqrt(newGLMEstVariance)
mseSLMmean = mseSLMmean + (newSLMPrediction - modelMean)^2
mseGLMmean = mseGLMmean + (newPredictedY - modelMean)^2
mseSLMsd = mseSLMsd + (newSLMsd - modelSd)^2
mseGLMsd = mseGLMsd + (newGLMsd - modelSd)^2
}
mseSLMmean = (1/numTrials) * mseSLMmean
mseGLMmean = (1/numTrials) * mseGLMmean
mseSLMsd = (1/numTrials) * mseSLMsd
mseGLMsd = (1/numTrials) * mseGLMsd
return(c(mseSLMmean, mseGLMmean, mseSLMsd, mseGLMsd))
}
predictionSimulation(0.75, 100)
## [1] 4.779416e-01 3.737011e-16 3.058965e-01 7.203965e-13